﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace LDAToyExample
{
    class Result : IComparable
    {
        double prob;
        string word;

        public double Prob
        {
            get { return prob; }
        }

        public string Word
        {
            get { return word; }
        }

        public Result(double prob, string word)
        {
            this.prob = prob;
            this.word = word;
        }

        public int CompareTo(Object obj)
        {
            Result r = obj as Result;
            if (prob > r.prob)
                return -1;
            else if (prob < r.prob)
                return 1;
            else
                return 0;
        }
    }

    class Program
    {
        static void Main(string[] args)
        {
            string[] vocabArray = { "networks", "learning", "bayesian", "graphical", "models", "information", "retrieval", "data", "mining" };
            Dictionary<string, int> vocabIndex = new Dictionary<string, int>();
            int V = 0;
            foreach (string word in vocabArray)
            {
                vocabIndex[word] = V++;
            }
            string[] doc = {"learning bayesian networks", "graphical models", "learning graphical models", "information retrieval", "data mining", "data information retrieval"};

            //int[][] docs = { new int[3], new int[2], new int[3], new int[2], new int[2], new int[3] };
            int[][] docs = new int[doc.Length][];
            int M = doc.Length;
            for(int m=0;m<M;m++)
            {
                string[] tokens = doc[m].Split(' ');
                int N = tokens.Length;
                docs[m] = new int[N];
                for (int n = 0; n < N; n++)
                {
                    docs[m][n] = vocabIndex[tokens[n]];
                }
            }

            int[][] zassign = new int[docs.Length][];

            //double[] betaPrior = new double[K];
            //for (int k = 0; k < K; k++)
            //{
            //    betaPrior[k] = 1;
            //}

            //Dirichlet beta = new Dirichlet(betaPrior);
            //Discrete phi;
            int K = 2;
            int z;
            int alpha = 1;
            int beta = 1;
            //nmk, nkv, 
            int[,] nmk = new int[docs.Length, K];
            int[] nm = new int[docs.Length];
            int[,] nkv = new int[K, V];
            int[] nk = new int[K];

            for (int i = 0; i < docs.Length; i++)
            {
                zassign[i] = new int[docs[i].Length];
                nm[i] = 0;
                for (int j = 0; j < K; j++)
                {
                    nmk[i, j] = 0;
                }
            }
            for (int i = 0; i < K; i++)
            {
                nk[i] = 0;
                for (int j = 0; j < V; j++)
                {
                    nkv[i, j] = 0;
                }
            }

            Random rand = new Random();

            for (int m = 0; m < docs.Length; m++)
            {
                for (int n = 0; n < docs[m].Length; n++)
                {
                    //phi = new Discrete(beta.Sample());
                    z = (int)rand.Next(0, K);
                    nmk[m, z]++;
                    nm[m]++;
                    nkv[z, docs[m][n]]++;
                    nk[z]++;

                    zassign[m][n] = z;
                }
            }
            double[] p = new double[K];
            int iterations = 1000;
            while (iterations > 0)
            {
                for (int m = 0; m < docs.Length; m++)
                {
                    for (int n = 0; n < docs[m].Length; n++)
                    {
                        z = zassign[m][n];
                        // this is to exclude the current i

                        nmk[m, z]--;
                        nm[m]--;
                        nkv[z, docs[m][n]]--;
                        nk[z]--;

                        for (int k = 0; k < K; k++)
                        {
                            p[k] = (double)(nkv[k, docs[m][n]] + beta) / (double)(nk[k] + V * beta) * (double)(nmk[m, k] + alpha) / (double)(nm[m] + K * alpha);
                        }

                        // cumulate multinomial parameters
                        for (int k = 1; k < K; k++)
                        {
                            p[k] += p[k - 1];
                        }

                        // scaled sample because of unnormalized p[]
                        double u = rand.NextDouble() * p[K - 1];

                        for (z = 0; z < K; z++)
                        {
                            if (p[z] > u) break;
                        }
                        nmk[m, z]++;
                        nm[m]++;
                        nkv[z, docs[m][n]]++;
                        nk[z]++;

                        zassign[m][n] = z;
                    }
                }
                iterations--;
            }//end of while
            //calculate parameters
            double[,] phi = new double[K, V];
            double[,] theta = new double[docs.Length, K];
            Result[][] topicWords = new Result[K][];
            for (int k = 0; k < K; k++)
            {
                topicWords[k] = new Result[V];
                for (int v = 0; v < V; v++)
                {
                    phi[k, v] = (double)(nkv[k, v] + beta) / (double)(nk[k] + V * beta);
                    topicWords[k][v] = new Result(phi[k, v], vocabArray[v]);
                }
                Array.Sort(topicWords[k]);
                for (int m = 0; m < docs.Length; m++)
                {
                    theta[m, k] = (double)(nmk[m, k] + alpha) / (double)(nm[m] + K * alpha);
                }
            }

            for (int k = 0; k < K; k++)
            {
                Console.WriteLine("Topic {0}", k);
                for (int v = 0; v < 5; v++)
                {
                    Console.WriteLine("{0} & {1} & {2}", v + 1, topicWords[k][v].Word, topicWords[k][v].Prob);
                }
                Console.WriteLine();
            }

            for (int m = 0; m < docs.Length; m++)
            {
                Console.Write("{0}", doc[m]);
                for (int k = 0; k < K; k++)
                {
                    Console.Write("\t{0}", theta[m, k]);
                }
                Console.WriteLine();
            }

            Console.WriteLine("Press any key");
            Console.ReadKey();
        }
    }
}
